###############################
# Analysis of conjoint data 
###############################

rm(list=ls())

sink("001_conjoint_analysis.txt")

# load packages 
library(foreign)
library(car)
library(lmtest)
library(sandwich)
library(stargazer)
library(msm)
library(Hmisc)
library(readstata13)

# load function that does clustered SEs
vcovCluster <- function(
  model,
  cluster
)
{
  require(sandwich)
  require(lmtest)
  if(nrow(model.matrix(model))!=length(cluster)){
    stop("check your data: cluster variable has different N than model")
  }
  M <- length(unique(cluster))
  N <- length(cluster)           
  K <- model$rank   
  if(M<50){
    warning("Fewer than 50 clusters, variances may be unreliable (could try block bootstrap instead).")
  }
  dfc <- (M/(M - 1)) * ((N - 1)/(N - K))
  uj  <- apply(estfun(model), 2, function(x) tapply(x, cluster, sum));
  rcse.cov <- dfc * sandwich(model, meat = crossprod(uj)/N)
  return(rcse.cov)
}

##############################
# Prepare data
##############################

# load data
d <- read.dta13("conjoint_Romania.dta")
head(d)
dim(d) 

d_rural = d[d$rural==1,]
dim(d_rural)
d_rural = d_rural[order(d_rural$code, decreasing=F), ]
table(d_rural$income)
d_rural$idnum2 = rep(1:254,each=10)
table(d_rural$idnum2)

d_urban = d[d$rural==0,]
dim(d_urban)
d_urban = d_urban[order(d_urban$code, decreasing=F), ]
table(d_urban$income)
d_urban$idnum2 = rep(1:248,each=10)
table(d_urban$idnum2)

d_income = d[d$income!=99,]
dim(d_income)
d_income = d_income[order(d_income$code, decreasing=F), ]
table(d_income$income)
d_income$idnum2 = rep(1:479,each=10)
table(d_income$idnum2)

###########################
## ACME using regression
###########################

# AMCE all respondents 
lm <- lm(outcome ~ atgen + atexp + atinti + atmita + atinte + atpol + atven,data=d)
lm_c <- round(coeftest(lm, vcov = vcovCluster(lm, cluster = d$idnum)),4)
lm_c

# BH correction 
p_female = lm_c[2,4]
p_incumbent = lm_c[3,4]
p_threat = lm_c[4,4]
p_100ron = lm_c[5,4]
p_social = lm_c[6,4]
p_investigated = lm_c[7,4]
p_sentenced = lm_c[8,4]
p_school = lm_c[9,4]
p_schoolroads = lm_c[10,4]
p_highincome = lm_c[11,4]

p_no_correction =  c(p_female,p_incumbent,p_threat,p_100ron,p_social,p_investigated,
                           p_sentenced,p_school,p_schoolroads,p_highincome)

p_correction_bh =  p.adjust(p_no_correction, "BH")
p_correction_bon =  p.adjust(p_no_correction, "bonferroni")

colnames = c("Female","Incumbent","Threat to non-supporters","100 RON","Social assistance","Investigated",
             "Sentenced","Renovate schools","Renovate schools and roads","High income")

p_no_correction = round(p_no_correction,4)
p_correction_bh = round(p_correction_bh,4)
p_correction_bon = round(p_correction_bon,4)
  
 # APPENDIX L
table = data.frame(colnames,p_no_correction,p_correction_bh,p_correction_bon)
colnames(table) <- c("Attributes", "No correction", "BH correction","Bon. correction")

stargazer(table, 
          type = "text",
          colnames = TRUE,
          summary = FALSE,
          title="P-values before and after multiple comparison correction", 
          digits=3, 
          rownames=FALSE, 
          float = TRUE, 
          float.env = "table", 
          table.placement = "H")

# Interaction urban 
lm2 <- lm(outcome ~ atgen + atexp + atinti + atmita + atinte + atpol + atven +
                       urban + 
                       atgen*urban + atexp*urban + atinti*urban + atmita*urban + atinte*urban + atpol*urban + atven*urban,
                       data=d)
lm2_c <- round(coeftest(lm2, vcov = vcovCluster(lm2, cluster = d$idnum)),4)

# Interaction rural
lm3 <- lm(outcome ~ atgen + atexp + atinti + atmita + atinte + atpol + atven +
               rural + 
               atgen*rural + atexp*rural + atinti*rural + atmita*rural + atinte*rural + atpol*rural + atven*rural,
               data=d)
lm3_c <- round(coeftest(lm3, vcov = vcovCluster(lm3, cluster = d$idnum)),4)

# Interaction less than 900 ron 
lm4 <- lm(outcome ~ atgen + atexp + atinti + atmita + atinte + atpol + atven +
                        less900ron + 
                        atgen*less900ron + atexp*less900ron + atinti*less900ron + atmita*less900ron + atinte*less900ron +  
                        atpol*less900ron + atven*less900ron,
                        data = d_income)
lm4_c <- round(coeftest(lm4, vcov = vcovCluster(lm4, cluster = d_income$idnum2)),4)

# Interaction more than 900 ron
lm5 <- lm(outcome ~ atgen + atexp + atinti + atmita + atinte + atpol + atven +
                        more900ron + 
                        atgen*more900ron + atexp*more900ron + atinti*more900ron + atmita*more900ron + atinte*more900ron +  
                        atpol*more900ron + atven*more900ron,
                        data=d_income)
lm5_c <- round(coeftest(lm5, vcov = vcovCluster(lm5, cluster = d_income$idnum2)),4)

# Check interactions between attributes: programmatic candidates
d$progcan = 1
d$progcan[d$atpol=="No promises"] = 0
table(d$progcan, exclude = NULL)

lm6 <- lm(outcome ~ atgen + atexp + atinti + atmita + atinte + progcan + atven +
                     atgen*progcan + atexp*progcan + atinti*progcan + atmita*progcan + 
                     atinte*progcan + atven*progcan, 
                     data=d)
lm6_c <- round(coeftest(lm6, vcov = vcovCluster(lm6, cluster = d$idnum)),4)

# Check interactions between attributes: non programmatic candidates
d$nonprogcan = 0
d$nonprogcan[d$atpol=="No promises"] = 1
table(d$nonprogcan, exclude = NULL)

lm7 <- lm(outcome ~ atgen + atexp + atinti + atmita + atinte + nonprogcan + atven + nonprogcan + 
                     atgen*nonprogcan + atexp*nonprogcan + atinti*nonprogcan + atmita*nonprogcan + 
                     atinte*nonprogcan + atven*nonprogcan, 
                     data=d)
lm7_c <- round(coeftest(lm7, vcov = vcovCluster(lm7, cluster = d$idnum)),4)

#############################################################
# Alternative CS computation to obtain confidence intervals 
#############################################################

# compute cse
get_CL_vcov<-function(model, cluster){
  require(sandwich, quietly = TRUE)
  require(lmtest, quietly = TRUE)
  
  #calculate degree of freedom adjustment
  M <- length(unique(cluster))
  N <- length(cluster)
  K <- model$rank
  dfc <- (M/(M-1))*((N-1)/(N-K))
  
  #calculate the uj's
  uj  <- apply(estfun(model),2, function(x) tapply(x, cluster, sum))
  
  #use sandwich to get the var-covar matrix
  vcovCL <- dfc*sandwich(model, meat=crossprod(uj)/N)
  return(vcovCL)
} 

# call our new function and save the var-cov matrix output in an object
lm_c2 <- get_CL_vcov(lm, d$idnum)

# estimates and standard errors 
coeftest(lm,lm_c2)

# function to compute confidence intervals 
get_confint<-function(model, vcovCL){
  t<-qt(.975, model$df.residual)
  ct<-coeftest(model, vcovCL)
  est<-cbind(ct[,1], ct[,1]-t*ct[,2], ct[,1]+t*ct[,2])
  colnames(est)<-c("Estimate","LowerCI","UpperCI")
  return(est)
}

# APPENDIX D
# obtain the confidence interval using the function
conf_int = get_confint(lm,lm_c2)
conf_int = conf_int[-1,]

stargazer(conf_int,title="",
          type = "text",
          align=TRUE, 
          omit.stat=c("LL","ser","f"), 
          no.space=TRUE,  
          table.placement = "H")

#####################
# Prepare plots
#####################

# All respondents
lm_c # regression results
coeff = lm_c[,1] # save coefficientes
coeff2 = cbind(coeff) # matrix of coefficientes
c_se = lm_c[,2] # save cluster standard errors
c_se2 = cbind(c_se) # matrix of cluster standard errors
results = data.frame(coeff2,c_se2) # generate dataset 
colnames(results) = c("y1","r1")  # change column names
results = results[-1,] # drop intercept 
baseline =  c(0,0) # gen baseline rows
results <- rbind(baseline,results[1, ], baseline,results[2, ],baseline,results[3, ],
                 baseline,results[4:5, ],baseline,results[6:7, ],baseline,results[8:9, ],baseline,results[10, ]) # add baselines
rownames(results) = c("1b.atgen","2.atgen",
                      "1b.atexp","2.atexp",
                      "1b.atinti","2.atinti", 
                      "1b.atmita","2.atmita","3.atmita",
                      "1b.atinte","2.atinte","3.atinte",
                      "1b.atpol","2.atpol","3.atpol",
                      "1b.atven","2.atven")
results 
write.table(results, "results_502_subjects.txt", sep="\t")

# Rural respondents
lm2_c # regression results
coeff = lm2_c[,1] # save coefficientes
coeff2 = cbind(coeff) # matrix of coefficientes
coeff3 = coeff2[1:11,] 
c_se = lm2_c[,2] # save cluster standard errors
c_se2 = cbind(c_se) # matrix of cluster standard errors
c_se3 = c_se2[1:11,]
results = data.frame(coeff3,c_se3) # generate dataset 
colnames(results) = c("y1","r1")  # change column names
results = results[-1,] # drop intercept 
baseline =  c(0,0) # gen baseline rows
results <- rbind(baseline,results[1, ], baseline,results[2, ],baseline,results[3, ],
                 baseline,results[4:5, ],baseline,results[6:7, ],baseline,results[8:9, ],baseline,results[10, ]) # add baselines
rownames(results) = c("1b.atgen","2.atgen",
                      "1b.atexp","2.atexp",
                      "1b.atinti","2.atinti", 
                      "1b.atmita","2.atmita","3.atmita",
                      "1b.atinte","2.atinte","3.atinte",
                      "1b.atpol","2.atpol","3.atpol",
                      "1b.atven","2.atven")
results 
write.table(results, "results_rural_subjects.txt", sep="\t")

# Urban respondents
lm3_c # regression results
coeff = lm3_c[,1] # save coefficientes
coeff2 = cbind(coeff) # matrix of coefficientes
coeff3 = coeff2[1:11,] 
c_se = lm2_c[,2] # save cluster standard errors
c_se2 = cbind(c_se) # matrix of cluster standard errors
c_se3 = c_se2[1:11,]
results = data.frame(coeff3,c_se3) # generate dataset 
colnames(results) = c("y1","r1")  # change column names
results = results[-1,] # drop intercept 
baseline =  c(0,0) # gen baseline rows
results <- rbind(baseline,results[1, ], baseline,results[2, ],baseline,results[3, ],
                 baseline,results[4:5, ],baseline,results[6:7, ],baseline,results[8:9, ],baseline,results[10, ]) # add baselines
rownames(results) = c("1b.atgen","2.atgen",
                      "1b.atexp","2.atexp",
                      "1b.atinti","2.atinti", 
                      "1b.atmita","2.atmita","3.atmita",
                      "1b.atinte","2.atinte","3.atinte",
                      "1b.atpol","2.atpol","3.atpol",
                      "1b.atven","2.atven")
results 
write.table(results, "results_urban_subjects.txt", sep="\t")

# More than 900 RON respondents
lm4_c # regression results
coeff = lm4_c[,1] # save coefficientes
coeff2 = cbind(coeff) # matrix of coefficientes
coeff3 = coeff2[1:11,] 
c_se = lm4_c[,2] # save cluster standard errors
c_se2 = cbind(c_se) # matrix of cluster standard errors
c_se3 = c_se2[1:11,]
results = data.frame(coeff3,c_se3) # generate dataset 
colnames(results) = c("y1","r1")  # change column names
results = results[-1,] # drop intercept 
baseline =  c(0,0) # gen baseline rows
results <- rbind(baseline,results[1, ], baseline,results[2, ],baseline,results[3, ],
                 baseline,results[4:5, ],baseline,results[6:7, ],baseline,results[8:9, ],baseline,results[10, ]) # add baselines
rownames(results) = c("1b.atgen","2.atgen",
                      "1b.atexp","2.atexp",
                      "1b.atinti","2.atinti", 
                      "1b.atmita","2.atmita","3.atmita",
                      "1b.atinte","2.atinte","3.atinte",
                      "1b.atpol","2.atpol","3.atpol",
                      "1b.atven","2.atven")
results 
write.table(results, "results_morethan900ron_subjects.txt", sep="\t")

# Less than RON respondents
lm5_c # regression results
coeff = lm5_c[,1] # save coefficientes
coeff2 = cbind(coeff) # matrix of coefficientes
coeff3 = coeff2[1:11,] 
c_se = lm5_c[,2] # save cluster standard errors
c_se2 = cbind(c_se) # matrix of cluster standard errors
c_se3 = c_se2[1:11,]
results = data.frame(coeff3,c_se3) # generate dataset 
colnames(results) = c("y1","r1")  # change column names
results = results[-1,] # drop intercept 
baseline =  c(0,0) # gen baseline rows
results <- rbind(baseline,results[1, ], baseline,results[2, ],baseline,results[3, ],
                 baseline,results[4:5, ],baseline,results[6:7, ],baseline,results[8:9, ],baseline,results[10, ]) # add baselines
rownames(results) = c("1b.atgen","2.atgen",
                      "1b.atexp","2.atexp",
                      "1b.atinti","2.atinti", 
                      "1b.atmita","2.atmita","3.atmita",
                      "1b.atinte","2.atinte","3.atinte",
                      "1b.atpol","2.atpol","3.atpol",
                      "1b.atven","2.atven")
results 
write.table(results, "results_lessthan900ron_subjects.txt", sep="\t")

# Non-programmatic candidates
lm6_c
coeff = lm6_c[,1] # save coefficientes
coeff2 = cbind(coeff) # matrix of coefficientes
coeff3 = coeff2[1:11,] 
c_se = lm6_c[,2] # save cluster standard errors
c_se2 = cbind(c_se) # matrix of cluster standard errors
c_se3 = c_se2[1:11,]
results = data.frame(coeff3,c_se3) # generate dataset 
colnames(results) = c("y1","r1")  # change column names
results = results[-1,] # drop intercept
results = results[-8,] # drop policy
results = results[-8,] # drop policy
baseline =  c(0,0) # gen baseline rows
results <- rbind(baseline,results[1, ],baseline,results[2, ],baseline,results[3, ],
                 baseline,results[4:5, ],baseline,results[6:7, ],baseline,results[8, ]) # add baselines
rownames(results) = c("1b.atgen","2.atgen",
                      "1b.atexp","2.atexp",
                      "1b.atinti","2.atinti", 
                      "1b.atmita","2.atmita","3.atmita",
                      "1b.atinte","2.atinte","3.atinte",
                      "1b.atven","2.atven")
results 
write.table(results, "results_nonprogrammatic_candidates.txt", sep="\t")

# Programmatic candidates
lm7_c
coeff = lm7_c[,1] # save coefficientes
coeff2 = cbind(coeff) # matrix of coefficientes
coeff3 = coeff2[1:11,] 
c_se = lm7_c[,2] # save cluster standard errors
c_se2 = cbind(c_se) # matrix of cluster standard errors
c_se3 = c_se2[1:11,]
results = data.frame(coeff3,c_se3) # generate dataset 
colnames(results) = c("y1","r1")  # change column names
results = results[-1,] # drop intercept
results = results[-8,] # drop policy
results = results[-8,] # drop policy
baseline =  c(0,0) # gen baseline rows
results <- rbind(baseline,results[1, ],baseline,results[2, ],baseline,results[3, ],
                 baseline,results[4:5, ],baseline,results[6:7, ],baseline,results[8, ]) # add baselines
rownames(results) = c("1b.atgen","2.atgen",
                      "1b.atexp","2.atexp",
                      "1b.atinti","2.atinti", 
                      "1b.atmita","2.atmita","3.atmita",
                      "1b.atinte","2.atinte","3.atinte",
                      "1b.atven","2.atven")
results 
write.table(results, "results_programmatic_candidates.txt", sep="\t")

###########################
# Delta coefficients plots
###########################

# Urban interaction
lm2_c # regression results
coeff = lm2_c[,1] # save coefficientes
coeff2 = cbind(coeff) # matrix of coefficientes
coeff3 = coeff2[13:22,] 
c_se = lm2_c[,2] # save cluster standard errors
c_se2 = cbind(c_se) # matrix of cluster standard errors
c_se3 = c_se2[13:22,]
results = data.frame(coeff3,c_se3) # generate dataset 
colnames(results) = c("y1","r1")  # change column namesbaseline =  c(0,0) # gen baseline rows
baseline =  c(0,0) # gen baseline rows
results <- rbind(baseline,results[1, ], baseline,results[2, ],baseline,results[3, ],
                 baseline,results[4:5, ],baseline,results[6:7, ],baseline,results[8:9, ],baseline,results[10, ]) # add baselines
rownames(results) = c("1b.atgen","2.atgen",
                      "1b.atexp","2.atexp",
                      "1b.atinti","2.atinti", 
                      "1b.atmita","2.atmita","3.atmita",
                      "1b.atinte","2.atinte","3.atinte",
                      "1b.atpol","2.atpol","3.atpol",
                      "1b.atven","2.atven")
results 
write.table(results, "results_delta_rural_urban.txt", sep="\t")

# Less than 900 RON respondents interaction
lm4_c
coeff = lm4_c[,1] # save coefficientes
coeff2 = cbind(coeff) # matrix of coefficientes
coeff3 = coeff2[13:22,] 
c_se = lm4_c[,2] # save cluster standard errors
c_se2 = cbind(c_se) # matrix of cluster standard errors
c_se3 = c_se2[11:20,]
results = data.frame(coeff3,c_se3) # generate dataset 
colnames(results) = c("y1","r1")  # change column namesbaseline =  c(0,0) # gen baseline rows
baseline =  c(0,0) # gen baseline rows
results <- rbind(baseline,results[1, ], baseline,results[2, ],baseline,results[3, ],
                 baseline,results[4:5, ],baseline,results[6:7, ],baseline,results[8:9, ],baseline,results[10, ]) # add baselines
rownames(results) = c("1b.atgen","2.atgen",
                      "1b.atexp","2.atexp",
                      "1b.atinti","2.atinti", 
                      "1b.atmita","2.atmita","3.atmita",
                      "1b.atinte","2.atinte","3.atinte",
                      "1b.atpol","2.atpol","3.atpol",
                      "1b.atven","2.atven")
results  
write.table(results, "results_delta_more900_less900.txt", sep="\t")

# Programmatic candidate interaction
lm6_c
coeff = lm6_c[,1] # save coefficientes
coeff2 = cbind(coeff) # matrix of coefficientes
coeff3 = coeff2[11:18,] 
c_se = lm6_c[,2] # save cluster standard errors
c_se2 = cbind(c_se) # matrix of cluster standard errors
c_se3 = c_se2[11:18,]
results = data.frame(coeff3,c_se3) # generate dataset 
colnames(results) = c("y1","r1")  # change column namesbaseline =  c(0,0) # gen baseline rows
baseline =  c(0,0) # gen baseline rows
results <- rbind(baseline,results[1, ], baseline,results[2, ],baseline,results[3, ],
                 baseline,results[4:5, ],baseline,results[6:7, ],baseline,results[8, ]) # add baselines
rownames(results) = c("1b.atgen","2.atgen",
                      "1b.atexp","2.atexp",
                      "1b.atinti","2.atinti", 
                      "1b.atmita","2.atmita","3.atmita",
                      "1b.atinte","2.atinte","3.atinte",
                      "1b.atven","2.atven")
results  
write.table(results, "results_delta_nonprog_prog.txt", sep="\t")

#####################
# Regression tables
#####################

# APPENDIX C
dim(d)
lm_c
stargazer(lm_c,title="",
          type = "text",
          align=TRUE, 
          omit.stat=c("LL","ser","f"), 
          covariate.labels=c("Female","Incumbent","Threat to non-supporters","100 RON","Social assistance","Investigated",
                             "Sentenced","Renovate schools","Renovate schools and roads","High income"),
          no.space=TRUE,  
          table.placement = "H",
          #single.row = TRUE,
          star.cutoffs = c(0.05,0.01,0.001),
          dep.var.caption  = "Outcome",
          dep.var.labels   = "Electoral Choice",
          add.lines = list(c("Respondents","502"),
                           c("Observations","5020","5020","5020","5020")))
                           
# APPENDIX K FIRST TABLE
dim(d)
lm6_c
stargazer(lm6_c,title="",
          type = "text",
          align=TRUE, 
          omit.stat=c("LL","ser","f"), 
          covariate.labels=c("Female","Incumbent","Threat to non-supporters","100 RON","Social assistance","Investigated",
                             "Sentenced","Programmatic","High income",
                             "Female*Programmatic","Incumbent*Programmatic","Threat to non-supporters*Programmatic",
                             "100 RON*Programmatic","Social assistance*Programmatic","Investigated*Programmatic",
                             "Sentenced*Programmatic","High income*Programmatic"),
          no.space=TRUE,  
          table.placement = "H",
          #single.row = TRUE,
          star.cutoffs = c(0.05,0.01,0.001),
          dep.var.caption  = "Outcome",
          dep.var.labels   = "Electoral Choice",
          add.lines = list(c("Respondents","502"),
                           c("Observations","5020","5020","5020","5020")))
          
# APPENDIX K SECOND TABLE
dim(d)
lm2_c
stargazer(lm2_c,title="",
          type = "text",
          align=TRUE, 
          omit.stat=c("LL","ser","f"), 
          covariate.labels=c("Female","Incumbent","Threat to non-supporters","100 RON","Social assistance","Investigated",
                             "Sentenced","Renovate schools","Renovate schools and roads","High income",
                             "Urban","Female*Urban","Incumbent*Urban","Threat to non-supporters*Urban","100 RON*Urban",
                             "Social assistance*Urban","Investigated*Urban","Sentenced*Urban","Renovate schools*Urban",
                             "Renovate schools and roads*Urban","High income*Urban"),
          no.space=TRUE,  
          table.placement = "H",
          #single.row = TRUE,
          star.cutoffs = c(0.05,0.01,0.001),
          dep.var.caption  = "Outcome",
          dep.var.labels   = "Electoral Choice",
          add.lines = list(c("Respondents","502"),
                           c("Observations","5020","5020","5020","5020")))

# APPENDIX K THIRD TABLE
dim(d_income)
lm4_c
stargazer(lm4_c,title="",
          type = "text",
          align=TRUE, 
          omit.stat=c("LL","ser","f"), 
          covariate.labels=c("Female","Incumbent","Threat to non-supporters","100 RON","Social assistance","Investigated",
                             "Sentenced","Renovate schools","Renovate schools and roads","High income",
                             "Less 900 RON","Female*Less 900 RON","Incumbent*Less 900 RON",
                             "Threat to non-supporters*Less 900 RON","100 RON*Less 900 RON",
                             "Social assistance*Less 900 RON","Investigated*Less 900 RON","Sentenced*Less 900 RON",
                             "Renovate schools*Less 900 RON","Renovate schools and roads*Less 900 RON","High income*Less 900 RON"),
          no.space=TRUE,  
          table.placement = "H",
          #single.row = TRUE,
          star.cutoffs = c(0.05,0.01,0.001),
          dep.var.caption  = "Outcome",
          dep.var.labels   = "Electoral Choice",
          add.lines = list(c("Respondents","479"),
                           c("Observations","4790","4790","4790","4790")))

sink()

